package com.xhj.es.util;

import com.xhj.es.entity.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

public class HtmlParseUtil {

    /**
     * 根据关键字爬取当当网数据
     * @param keyword
     * @return
     */
    public static List<Content> ddParse(String keyword) throws IOException {
        String url = "http://search.dangdang.com/?key=" + keyword;
        Document document = Jsoup.parse(new URL(url), 30000);
        Element element = document.getElementById("search_nature_rg");
        Elements lis = element.getElementsByTag("li");
        List<Content> contents = new ArrayList<>();
        for (Element li : lis) {
            String img = li.getElementsByTag("img").eq(0).attr("data-original");
            String name = li.getElementsByClass("name").eq(0).text();
            String price = li.getElementsByClass("price").eq(0).text();
            Content content = new Content(img,name,price);
            contents.add(content);
        }
        return contents;
    }
}
